import pandas as pd
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent


def scrape_apple_community():
    url = "https://discussionschinese.apple.com/browse"
    headers = {"User-Agent": UserAgent().random}
    res = requests.get(url=url, headers=headers).content

    with open('apple_community.html', 'wb') as f:
        f.write(res)

    with open('apple_community.html', 'r', encoding='utf-8') as f:
        html = f.read()
    soup = BeautifulSoup(html, 'html.parser')

    # 找到所有包含数据的<tr>元素
    data_rows = soup.find_all('tr', class_='topics-table-row')
    # 创建一个空的DataFrame来存储数据
    df = pd.DataFrame(columns=['URL路径', '帖子标题', '作者名字'])
    for row in data_rows:
        # 提取URL路径
        url_path = "https://discussionschinese.apple.com" + row.find('a', class_='topic-title-link')['href']
        # 提取标题
        title = row.find('a', class_='topic-title-link').text
        # 提取作者名字
        author = row.find('a', class_='post-author-profile').text.strip()
        # 添加到DataFrame中
        df = pd.concat([df, pd.DataFrame({'URL路径': [url_path], '帖子标题': [title], '作者名字': [author]})],
                       ignore_index=True)
    df.to_excel('data1.xlsx', index=False)


if __name__ == '__main__':
    # 调用函数来执行数据抓取和保存
    scrape_apple_community()
